library("weights")
install.packages("weights")
rm(list=ls())
library("foreign")
library("car")
library("AER")
library("ivpack")
library("survey")
library("sjPlot")
library("sjmisc")
library("ggplot2")
library("patchwork")
library("ggeffects")
library("stargazer")
library("jtools")
library("margins")
library("interactions")
library("effectsize")
library("readstata13")
library("corrplot")
library("plyr")
library("reshape2")
library("xlsx")
library("srvyr")
library("MplusAutomation")
library("lavaan")
library("semTable")
library("xtable")
library("ltm")
library("weights")
###################################################
####################### READ AND PREPARE VARIABLES
###################################################
dat = read.dta13(file="Citizen-survey_STATA_2021-01-15_g.dta",
convert.factors = FALSE)
dat$female = car::recode(dat$soz1, "1=0; 2=1; 3=0")
dat$age = 2021-dat$soz2
dat$edu = NA
dat$edu[dat$soz9 %in% c(1,2)] = 1
dat$edu[dat$soz9 == 3 | dat$soz10 == 2] = 2
dat$edu[dat$soz9 == 3 & dat$soz10 == 2] = 3
dat$edu[(dat$soz10 == 3) | (dat$soz10 == 4)] = 4
dat$income = dat$inc2
dat$income[dat$income < 0] = NA
dat$income.groups = car::recode(dat$inc3, "1=1; 2=2; 3=3; 4=4; 5=5; 6=6; else=NA")
dat$income.groups[(dat$income >= 0) & (dat$income <= 1000)] = 1
dat$income.groups[(dat$income > 1000) & (dat$income <= 2000)] = 2
dat$income.groups[(dat$income > 2000) & (dat$income <= 3000)] = 3
dat$income.groups[(dat$income > 3000) & (dat$income <= 4000)] = 4
dat$income.groups[(dat$income > 4000) & (dat$income <= 5000)] = 5
dat$income.groups[(dat$income > 5000)] = 6
dat$polinterest = car::recode(dat$pol1, "1=5; 2=4; 3=3; 4=2; 5=1; else=NA")
dat$ideology = car::recode(dat$pol3,"-99=NA")
dat$tax.welfare = car::recode(dat$pol6, "-99=NA")
#####################################################
####################### PREPARE SOCIAL SAMPLING DATA
#####################################################
dat$sosa.treatment = car::recode(dat$sosa_rand,
"1 = 'Control';
2 = 'High Estimation';
3 = 'Low Estimation'")
dat$sosa.dv1 = dat$sosa4a
dat$sosa.dv2 = car::recode(dat$sosa4b, "1=6;2=5;3=4;4=3;5=2;6=1")
dat$sosa.dv3 = dat$sosa4c
dat$sosa.dv4 = car::recode(dat$sosa4d, "1=6;2=5;3=4;4=3;5=2;6=1")
dat$sosa.redistribute = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv3", "sosa.dv4")])
### Recode Percentage to 100%
dat$rc_sosa3a = dat$sosa3a / dat$sosa3_sum * 100
dat$rc_sosa3b = dat$sosa3b / dat$sosa3_sum * 100
dat$rc_sosa3c = dat$sosa3c / dat$sosa3_sum * 100
dat$rc_sosa3d = dat$sosa3d / dat$sosa3_sum * 100
dat$rc_sosa3e = dat$sosa3e / dat$sosa3_sum * 100
dat$rc_sosa3f = dat$sosa3f / dat$sosa3_sum * 100
### Recode Percentage (NA --> 0)
dat$nona_sosa3a = car::recode(dat$rc_sosa3a, "NA=0")
dat$nona_sosa3b = car::recode(dat$rc_sosa3b, "NA=0")
dat$nona_sosa3c = car::recode(dat$rc_sosa3c, "NA=0")
dat$nona_sosa3d = car::recode(dat$rc_sosa3d, "NA=0")
dat$nona_sosa3e = car::recode(dat$rc_sosa3e, "NA=0")
dat$nona_sosa3f = car::recode(dat$rc_sosa3f, "NA=0")
dat$nona_sosa3a[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3b[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3c[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3d[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3e[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3f[is.na(dat$sosa3_sum)] = NA
### Mean of Income Distribution
# 80% for lowest interval and 30% for highest interval is from Dawtry et al. (2015)
# Whether NA income category stays NA or recoded to zero doesn't matter/affect the average
vec = cbind((.80*1000 * dat$rc_sosa3a/100),
(mean(1001:2000) * dat$rc_sosa3b/100),
(mean(2001:3000) * dat$rc_sosa3c/100),
(mean(3001:4000) * dat$rc_sosa3d/100),
(mean(4001:5000) * dat$rc_sosa3e/100),
(1.3*5001 * dat$rc_sosa3f/100))
dat$mean_inc = rowSums(vec, na.rm=TRUE)
dat$mean_inc[is.na(dat$sosa3_sum)] = NA
### Turn mean_inc == 0 to NA because log(0) = -Inf
dat$mean_inc[dat$mean_inc == 0] = NA
dat$mean_inc = log(dat$mean_inc)
dat$sosa123 = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv3")], na.rm=TRUE)
dat$sosa124 = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv4")], na.rm=TRUE)
dat$sosa134 = rowMeans(dat[, c("sosa.dv1", "sosa.dv3", "sosa.dv4")], na.rm=TRUE)
dat$sosa234 = rowMeans(dat[, c("sosa.dv2", "sosa.dv3", "sosa.dv4")], na.rm=TRUE)
### Create Survey Data
svydat = svydesign(id=~1, data=dat, weights = dat$Gewicht)
rm(list=ls())
library("foreign")
library("car")
library("AER")
library("ivpack")
library("survey")
library("sjPlot")
library("sjmisc")
library("ggplot2")
library("patchwork")
library("ggeffects")
library("stargazer")
library("jtools")
library("margins")
library("interactions")
library("effectsize")
library("readstata13")
library("corrplot")
library("plyr")
library("reshape2")
library("xlsx")
library("srvyr")
library("lavaan")
library("semTable")
library("xtable")
library("ltm")
library("weights")
###################################################
####################### READ AND PREPARE VARIABLES
###################################################
dat = read.dta13(file="Citizen-survey_STATA_2021-01-15_g.dta",
convert.factors = FALSE)
dat$female = car::recode(dat$soz1, "1=0; 2=1; 3=0")
dat$age = 2021-dat$soz2
dat$edu = NA
dat$edu[dat$soz9 %in% c(1,2)] = 1
dat$edu[dat$soz9 == 3 | dat$soz10 == 2] = 2
dat$edu[dat$soz9 == 3 & dat$soz10 == 2] = 3
dat$edu[(dat$soz10 == 3) | (dat$soz10 == 4)] = 4
dat$income = dat$inc2
dat$income[dat$income < 0] = NA
dat$income.groups = car::recode(dat$inc3, "1=1; 2=2; 3=3; 4=4; 5=5; 6=6; else=NA")
dat$income.groups[(dat$income >= 0) & (dat$income <= 1000)] = 1
dat$income.groups[(dat$income > 1000) & (dat$income <= 2000)] = 2
dat$income.groups[(dat$income > 2000) & (dat$income <= 3000)] = 3
dat$income.groups[(dat$income > 3000) & (dat$income <= 4000)] = 4
dat$income.groups[(dat$income > 4000) & (dat$income <= 5000)] = 5
dat$income.groups[(dat$income > 5000)] = 6
dat$polinterest = car::recode(dat$pol1, "1=5; 2=4; 3=3; 4=2; 5=1; else=NA")
dat$ideology = car::recode(dat$pol3,"-99=NA")
dat$tax.welfare = car::recode(dat$pol6, "-99=NA")
#####################################################
####################### PREPARE SOCIAL SAMPLING DATA
#####################################################
dat$sosa.treatment = car::recode(dat$sosa_rand,
"1 = 'Control';
2 = 'High Estimation';
3 = 'Low Estimation'")
dat$sosa.dv1 = dat$sosa4a
dat$sosa.dv2 = car::recode(dat$sosa4b, "1=6;2=5;3=4;4=3;5=2;6=1")
dat$sosa.dv3 = dat$sosa4c
dat$sosa.dv4 = car::recode(dat$sosa4d, "1=6;2=5;3=4;4=3;5=2;6=1")
dat$sosa.redistribute = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv3", "sosa.dv4")])
### Recode Percentage to 100%
dat$rc_sosa3a = dat$sosa3a / dat$sosa3_sum * 100
dat$rc_sosa3b = dat$sosa3b / dat$sosa3_sum * 100
dat$rc_sosa3c = dat$sosa3c / dat$sosa3_sum * 100
dat$rc_sosa3d = dat$sosa3d / dat$sosa3_sum * 100
dat$rc_sosa3e = dat$sosa3e / dat$sosa3_sum * 100
dat$rc_sosa3f = dat$sosa3f / dat$sosa3_sum * 100
### Recode Percentage (NA --> 0)
dat$nona_sosa3a = car::recode(dat$rc_sosa3a, "NA=0")
dat$nona_sosa3b = car::recode(dat$rc_sosa3b, "NA=0")
dat$nona_sosa3c = car::recode(dat$rc_sosa3c, "NA=0")
dat$nona_sosa3d = car::recode(dat$rc_sosa3d, "NA=0")
dat$nona_sosa3e = car::recode(dat$rc_sosa3e, "NA=0")
dat$nona_sosa3f = car::recode(dat$rc_sosa3f, "NA=0")
dat$nona_sosa3a[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3b[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3c[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3d[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3e[is.na(dat$sosa3_sum)] = NA
dat$nona_sosa3f[is.na(dat$sosa3_sum)] = NA
### Mean of Income Distribution
# 80% for lowest interval and 30% for highest interval is from Dawtry et al. (2015)
# Whether NA income category stays NA or recoded to zero doesn't matter/affect the average
vec = cbind((.80*1000 * dat$rc_sosa3a/100),
(mean(1001:2000) * dat$rc_sosa3b/100),
(mean(2001:3000) * dat$rc_sosa3c/100),
(mean(3001:4000) * dat$rc_sosa3d/100),
(mean(4001:5000) * dat$rc_sosa3e/100),
(1.3*5001 * dat$rc_sosa3f/100))
dat$mean_inc = rowSums(vec, na.rm=TRUE)
dat$mean_inc[is.na(dat$sosa3_sum)] = NA
### Turn mean_inc == 0 to NA because log(0) = -Inf
dat$mean_inc[dat$mean_inc == 0] = NA
dat$mean_inc = log(dat$mean_inc)
dat$sosa123 = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv3")], na.rm=TRUE)
dat$sosa124 = rowMeans(dat[, c("sosa.dv1", "sosa.dv2", "sosa.dv4")], na.rm=TRUE)
dat$sosa134 = rowMeans(dat[, c("sosa.dv1", "sosa.dv3", "sosa.dv4")], na.rm=TRUE)
dat$sosa234 = rowMeans(dat[, c("sosa.dv2", "sosa.dv3", "sosa.dv4")], na.rm=TRUE)
### Create Survey Data
svydat = svydesign(id=~1, data=dat, weights = dat$Gewicht)
pca = svyprcomp(~ sosa.dv1 + sosa.dv2 + sosa.dv3 + sosa.dv4, design=svydat)
summary(pca)
alpha = svycralpha(~ sosa.dv1 + sosa.dv2 + sosa.dv3 + sosa.dv4, design=svydat, na.rm=TRUE)
summary(alpha)
corr = svycor(~ sosa.dv1 + sosa.dv2 + sosa.dv3 + sosa.dv4 + sosa123 + sosa124 + sosa134 + sosa234, design=svydat, na.rm=TRUE, sig.stats=TRUE)
corr
out = table(dat$sosa3_sum == 100)
names(out) = c("!= 100%", "==100%")
out
prop.table(out)
### Function to plot income distribution and SOEP distribution
sosa.dist.plot <- function(sosa.dist) {
col.mean = c(2,4,6,8,10,12)
col.se = c(3,5,7,9,11,13)
colnames(sosa.dist)[col.se] = c("se_1k", "se_1k_2k", "se_2k_3k",
"se_3k_4k", "se_4k_5k", "se_5k")
sosa.dist[1,col.se] = sosa.dist[1,col.se]/sum(sosa.dist[1,col.mean])*100
sosa.dist[2,col.se] = sosa.dist[2,col.se]/sum(sosa.dist[2,col.mean])*100
sosa.dist[3,col.se] = sosa.dist[3,col.se]/sum(sosa.dist[3,col.mean])*100
sosa.dist[1,col.mean] = sosa.dist[1,col.mean]/sum(sosa.dist[1,col.mean])*100
sosa.dist[2,col.mean] = sosa.dist[2,col.mean]/sum(sosa.dist[2,col.mean])*100
sosa.dist[3,col.mean] = sosa.dist[3,col.mean]/sum(sosa.dist[3,col.mean])*100
colnames(sosa.dist) = gsub("mean_", "mean.", colnames(sosa.dist))
colnames(sosa.dist) = gsub("se_", "se.", colnames(sosa.dist))
sosa.dist.long = reshape(data.frame(sosa.dist),
idvar = "sosa.treatment",
varying=(2:13),
sep = ".",
direction = 'long')
colnames(sosa.dist.long) = c("Treatment", "Income", "Estimate", "se")
sosa.dist.long$Treatment = factor(sosa.dist.long$Treatment,
levels=c("Low Estimation", "Control", "High Estimation"))
sosa.dist.long$Income =
c(rep("Up to\n1,000", 3),
rep("1,001 -\n2,000", 3),
rep("2,001 -\n3,000", 3),
rep("3,001 -\n4,000", 3),
rep("4,001 -\n5,000", 3),
rep("5,001\nor more", 3))
sosa.dist.long$Income =
factor(sosa.dist.long$Income,
levels = c("Up to\n1,000", "1,001 -\n2,000", "2,001 -\n3,000", "3,001 -\n4,000", "4,001 -\n5,000", "5,001\nor more"))
### Get income distribution of population (SOEP Data)
soep.dist =
data.frame(Income = c("Up to\n1,000", "1,001 -\n2,000", "2,001 -\n3,000", "3,001 -\n4,000", "4,001 -\n5,000", "5,001\nor more"),
Estimate =c(11.38, 29.68, 24.09, 16.20, 9.43, 9.21))
#### Plot the histogram
# colorblind friendly palette
cbp1 <- c("#0072B2", "#009E73", "#999999", "#56B4E9", "#E69F00",
"#F0E442", "#D55E00", "#CC79A7")
sosa.plot <- ggplot(data = sosa.dist.long) +
geom_bar(aes(x=Income, y=Estimate, fill=Treatment),
position=position_dodge(width = .9), stat="identity") +
geom_errorbar(aes(x=Income,
ymin=Estimate - 1.96*se,
ymax=Estimate + 1.96*se,
group = Treatment),
width=.1, lwd=1, position = position_dodge(width = .9)) +
scale_fill_manual(values = cbp1) +
theme_bw() +
ylab("Estimate (%)") +
xlab("Income Group") +
theme(legend.position="top", legend.title = element_blank())
soep.plot <- ggplot() +
geom_point(data = soep.dist,
aes(x=Income, y=Estimate, group=1),
color="blue") +
geom_line(data = soep.dist,
aes(x=Income, y=Estimate, group=1),
color="blue")
print(
sosa.plot + soep.plot$layers[[1]] + soep.plot$layers[[2]] +
annotate("text", label = "Population Distribution (SOEP data)",
x = 4.5, y = 27, size = 3, colour = "blue")
)
}
plot.main.effect <- function(plot.main, title, title.size, legend.title.size, lab.y, ylim) {
plot.out <-
plot.main +
geom_errorbar(aes(ymin=Estimate - 1.96*se, ymax=Estimate + 1.96*se),
width=.1, lwd=.7, position = position_dodge(width = .3)) +
geom_point(size=2, fill="black", position = position_dodge(width = .3)) +
theme_bw() +
theme(legend.text=element_text(size=legend.title.size),
plot.title = element_text(size=title.size)) +
ylab(lab.y) +
xlab("") +
theme(axis.title.x = element_text(size = 12)) +
theme(axis.title.y = element_text(size = 12)) +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.text.y = element_text(size = 12)) +
ylim(ylim[1], ylim[2]) +
ggtitle(title)
return (plot.out)
}
### Create Survey Data
svydat = dat %>%
as_survey_design(weights = Gewicht)
sosa.dist = svydat %>%
group_by(sosa.treatment) %>%
summarize(mean_1k = survey_mean(rc_sosa3a, na.rm=TRUE),
mean_1k_2k = survey_mean(rc_sosa3b, na.rm=TRUE),
mean_2k_3k = survey_mean(rc_sosa3c, na.rm=TRUE),
mean_3k_4k = survey_mean(rc_sosa3d, na.rm=TRUE),
mean_4k_5k = survey_mean(rc_sosa3e, na.rm=TRUE),
mean_5k = survey_mean(rc_sosa3f, na.rm=TRUE))
table(dat$sosa.treatment)
### Create Survey Data
svydat = dat %>%
as_survey_design(weights = Gewicht)
svydat$variables
summary(svydat$variables)
sosa.dist = svydat %>%
group_by(sosa.treatment) %>%
summarize(mean_1k = survey_mean(rc_sosa3a, na.rm=TRUE),
mean_1k_2k = survey_mean(rc_sosa3b, na.rm=TRUE),
mean_2k_3k = survey_mean(rc_sosa3c, na.rm=TRUE),
mean_3k_4k = survey_mean(rc_sosa3d, na.rm=TRUE),
mean_4k_5k = survey_mean(rc_sosa3e, na.rm=TRUE),
mean_5k = survey_mean(rc_sosa3f, na.rm=TRUE))
sosa.dist = svydat %>%
group_by(sosa.treatment) %>%
plyr::summarize(mean_1k = survey_mean(rc_sosa3a, na.rm=TRUE),
mean_1k_2k = survey_mean(rc_sosa3b, na.rm=TRUE),
mean_2k_3k = survey_mean(rc_sosa3c, na.rm=TRUE),
mean_3k_4k = survey_mean(rc_sosa3d, na.rm=TRUE),
mean_4k_5k = survey_mean(rc_sosa3e, na.rm=TRUE),
mean_5k = survey_mean(rc_sosa3f, na.rm=TRUE))
sosa.dist = svydat %>%
plyr::group_by(sosa.treatment) %>%
plyr::summarize(mean_1k = survey_mean(rc_sosa3a, na.rm=TRUE),
mean_1k_2k = survey_mean(rc_sosa3b, na.rm=TRUE),
mean_2k_3k = survey_mean(rc_sosa3c, na.rm=TRUE),
mean_3k_4k = survey_mean(rc_sosa3d, na.rm=TRUE),
mean_4k_5k = survey_mean(rc_sosa3e, na.rm=TRUE),
mean_5k = survey_mean(rc_sosa3f, na.rm=TRUE))
